Data Visualization (Seaborn)

In [510]:
#importing seaborn
In [511]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

"""

NUMERICAL DATA PLOT :

--relplot() --scatterplot() --lineplot()

CATEGORICAL DATA PLOT :

--catplot() --boxplot() --stripplot() --swarmplot() --etc...

VISUALIZING DISTRIBUTION OF DATA :

--distplot() --kdeplot() --jointplot() --rugplot()

LINEAR REGRESSION & RELATIONSHIP:

--regplot() --lmplot()

CONTROLLING PLOTTED FIGURES AESTHETICS :

--figure styling --axes styling --color styling --etc ....
"""

In [513]:
# NUMERICAL DATA PLOT :

#-relplot()

#-scatterplot()

#-lineplot()
In [514]:
#NUMERIC
sns.set(style="darkgrid")
tips=sns.load_dataset('tips')
tips.head()
Out[514]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [515]:
#RELPLOT() [RELATIONAL PLOT]
sns.relplot(x="total_bill",y='tip',data=tips) #RELATIONAL PLOT BETWEEN TWO VARIABLES.
Out[515]:
<seaborn.axisgrid.FacetGrid at 0x16e34a000c8>
In [516]:
dir(sns.FacetGrid) #OPERATIONS THAT CAN BE PERFORMED.
Out[516]:
['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_bottom_axes',
 '_clean_axis',
 '_facet_color',
 '_facet_plot',
 '_finalize_grid',
 '_get_palette',
 '_inner_axes',
 '_left_axes',
 '_legend_out',
 '_margin_titles',
 '_not_bottom_axes',
 '_not_left_axes',
 '_update_legend_data',
 'add_legend',
 'ax',
 'despine',
 'facet_axis',
 'facet_data',
 'map',
 'map_dataframe',
 'savefig',
 'set',
 'set_axis_labels',
 'set_titles',
 'set_xlabels',
 'set_xticklabels',
 'set_ylabels',
 'set_yticklabels']
In [517]:
sns.relplot(x='total_bill',y='tip',data=tips,hue='smoker') #hue for categorizing variables
Out[517]:
<seaborn.axisgrid.FacetGrid at 0x16e34a34b48>
In [518]:
sns.relplot(x='total_bill',y='tip',data=tips,hue='smoker',style='time') #style for representation of categories
Out[518]:
<seaborn.axisgrid.FacetGrid at 0x16e34a72f08>
In [519]:
sns.relplot(x='total_bill',y='tip',hue='smoker',style='time',data=tips,palette='ch:r=-0.9,l=0.75') #lightining and Darkening
Out[519]:
<seaborn.axisgrid.FacetGrid at 0x16e34ace748>
In [520]:
sns.relplot(x='total_bill',y='tip',data=tips,size='size',hue='sex',style='day') #size Changes size
Out[520]:
<seaborn.axisgrid.FacetGrid at 0x16e35b580c8>
In [521]:
sns.relplot(x='total_bill',y='tip',data=tips,size='size',sizes=(15,200),hue='sex',style='time',palette='ch:r=-0.9,l=0.75')
#sizes range
Out[521]:
<seaborn.axisgrid.FacetGrid at 0x16e35bdbf88>
In [522]:
from numpy.random import randn
df=pd.DataFrame(dict(time=np.arange(500),value=randn(500).cumsum()))#CUMMILATIVE SUM
In [523]:
df.head()
Out[523]:
time value
0 0 -0.043407
1 1 0.448605
2 2 -0.948882
3 3 -2.698156
4 4 -3.864177
In [524]:
sns.relplot(x='time',y='value',kind='line',data=df)
Out[524]:
<seaborn.axisgrid.FacetGrid at 0x16e35bb5c48>
In [525]:
sns.relplot(x='time',y='value',kind='scatter',data=df,hue='value')
Out[525]:
<seaborn.axisgrid.FacetGrid at 0x16e35ce91c8>
In [526]:
df=pd.DataFrame(randn(500,2).cumsum(axis=0),columns=['time','value'])
df.head()
Out[526]:
time value
0 0.678582 -1.098708
1 -0.164481 -1.436054
2 0.292363 -1.922904
3 2.485009 -1.500831
4 3.093593 -2.565654
In [527]:
sns.relplot(x='time',y='value',kind='line',data=df,sort=False)
Out[527]:
<seaborn.axisgrid.FacetGrid at 0x16e35d9e988>
In [528]:
sns.relplot(x='time',y='value',kind='line',data=df,sort=True)
Out[528]:
<seaborn.axisgrid.FacetGrid at 0x16e35e0f3c8>
In [529]:
#Repeated Measures data
fmri=sns.load_dataset('fmri')
#MULTIPLE VALUES FOR SAME TIME POINT
fmri.head()
Out[529]:
subject timepoint event region signal
0 s13 18 stim parietal -0.017552
1 s5 14 stim parietal -0.080883
2 s12 18 stim parietal -0.081033
3 s11 18 stim parietal -0.046134
4 s10 18 stim parietal -0.037970
In [530]:
sns.relplot(x='timepoint',y='signal',kind='line',data=fmri)#ci confidence interval is true
Out[530]:
<seaborn.axisgrid.FacetGrid at 0x16e35e63e08>
In [531]:
sns.relplot(x='timepoint',y='signal',kind='line',data=fmri,ci=False)#ci confidence interval is false
Out[531]:
<seaborn.axisgrid.FacetGrid at 0x16e35edc508>
In [532]:
sns.relplot(x='timepoint',y='signal',kind='line',data=fmri,ci='sd')#ci confidence interval
Out[532]:
<seaborn.axisgrid.FacetGrid at 0x16e35f49088>
In [533]:
sns.relplot(x='timepoint',y='signal',kind='line',estimator=None,data=fmri)
#ci confidence interval removed.
#i.e Estimator is None ..
Out[533]:
<seaborn.axisgrid.FacetGrid at 0x16e35c17308>
In [534]:
sns.relplot(x='timepoint',y='signal',kind='line',data=fmri,ci='sd',hue='event')#ci confidence interval
Out[534]:
<seaborn.axisgrid.FacetGrid at 0x16e36fefe88>
In [535]:
sns.relplot(x='timepoint',y='signal',hue='region',style='event',kind='line',data=fmri,markers=True,dashes=False)
Out[535]:
<seaborn.axisgrid.FacetGrid at 0x16e35c071c8>
In [536]:
sns.relplot(x='timepoint',y='signal',hue='region',style='region',kind='line',data=fmri,markers=True,dashes=False)
Out[536]:
<seaborn.axisgrid.FacetGrid at 0x16e37100d48>
In [537]:
sns.relplot(x='timepoint',y='signal',hue='region',style='event',kind='line',data=fmri,markers=True,dashes=True)
Out[537]:
<seaborn.axisgrid.FacetGrid at 0x16e37194908>
In [538]:
sns.relplot(x='timepoint',y='signal',hue='event',style='event',kind='line',data=fmri) 
#Differenshiating same in different styles
Out[538]:
<seaborn.axisgrid.FacetGrid at 0x16e3716f688>
In [539]:
sns.relplot(x='timepoint',y='signal',hue='event',units='subject',estimator=None,kind='line',data=fmri)
Out[539]:
<seaborn.axisgrid.FacetGrid at 0x16e372aca08>
In [540]:
sns.relplot(x='timepoint',y='signal',hue='region',units='subject',estimator=None,kind='line',data=fmri.query("event=='stim'"))
#Querying the required data.
Out[540]:
<seaborn.axisgrid.FacetGrid at 0x16e373a6348>
In [541]:
dots=sns.load_dataset("dots").query("align=='dots'") #querying only dots out
dots.head()
Out[541]:
align choice time coherence firing_rate
0 dots T1 -80 0.0 33.189967
1 dots T1 -80 3.2 31.691726
2 dots T1 -80 6.4 34.279840
3 dots T1 -80 12.8 32.631874
4 dots T1 -80 25.6 35.060487
In [542]:
sns.relplot(x='time',y='firing_rate',data=dots,kind='line',hue='coherence',style='choice')
Out[542]:
<seaborn.axisgrid.FacetGrid at 0x16e37482148>
In [543]:
palette=sns.cubehelix_palette(light=0.5,n_colors=6) 
#here we are giving different colors to coherence which has 6 categories
sns.relplot(x='time',y='firing_rate',data=dots,kind='line',hue='coherence',style='choice',palette=palette)
Out[543]:
<seaborn.axisgrid.FacetGrid at 0x16e374c4e08>
In [544]:
sns.relplot(x='time',y='firing_rate',hue='coherence',size='coherence',style='choice',kind='line',data=dots,sizes=(1,5))
Out[544]:
<seaborn.axisgrid.FacetGrid at 0x16e38574848>
In [545]:
df=pd.DataFrame(dict(time=pd.date_range('2019-06-02',periods=500),value=randn(500).cumsum()))
df.head()
Out[545]:
time value
0 2019-06-02 0.431948
1 2019-06-03 0.062967
2 2019-06-04 1.110783
3 2019-06-05 -0.100068
4 2019-06-06 0.440896
In [546]:
g=sns.relplot(x='time',y='value',kind='line',data=df)
#Dates are not clearly visible
In [547]:
g=sns.relplot(x='time',y='value',kind='line',data=df)
g.fig.autofmt_xdate()
In [548]:
#USING FACEDGRID YOU CAN DRAW MULTIPLE PLOTS.
In [549]:
#LETS CHECK OUT TIPS DATASET.
In [550]:
tips.head()
Out[550]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [551]:
sns.relplot(x='total_bill',y='tip',hue='smoker',data=tips)

#HERE WE CAN SEE WE HAVE A COMBINED PLOT LETS CATEGORISE THIS PLOT INTO MULTIPLE plots USING FACEDGRID.
Out[551]:
<seaborn.axisgrid.FacetGrid at 0x16e387a8508>
In [552]:
sns.relplot(x='total_bill',y='tip',hue='smoker',col='smoker',data=tips) 
#WE categorised combined plot into two individual plots
Out[552]:
<seaborn.axisgrid.FacetGrid at 0x16e38693908>
In [553]:
#Similarly
sns.relplot(x='total_bill',y='tip',hue='smoker',col='time',data=tips) 
Out[553]:
<seaborn.axisgrid.FacetGrid at 0x16e388a50c8>
In [554]:
sns.relplot(x='total_bill',y='tip',hue='smoker',col='size',data=tips)
Out[554]:
<seaborn.axisgrid.FacetGrid at 0x16e3898cec8>
In [555]:
sns.relplot(x='total_bill',y='tip',hue='smoker',row='size',data=tips)#GIVES VERTICALLY ROW WISE.
Out[555]:
<seaborn.axisgrid.FacetGrid at 0x16e3914b9c8>
In [556]:
sns.relplot(x='timepoint',y='signal',hue='subject',col='region',row='event',height=3,kind='line',ci='sd',data=fmri)
#GIVES 2 CROSS 2
#HERE COL WISE CATEGORIZATION
#HERE ROW WISE CATEGORIZATION
Out[556]:
<seaborn.axisgrid.FacetGrid at 0x16e393508c8>
In [557]:
sns.relplot(x='total_bill',y='tip',hue='smoker',col='size',data=tips,col_wrap=3,height=3)
#HERE WE WANT 2*3 SO WE KNOW 6 CATEGORIZES SO TO GET 2 CROSS 3 WE PASS COL_WRAP=3
Out[557]:
<seaborn.axisgrid.FacetGrid at 0x16e3abfd348>
In [558]:
#LETS SEE LINE PLOT NOW FOR NUMERICAL DATA.
sns.lineplot(x='total_bill',y='tip',data=tips)
Out[558]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b41ef08>
In [559]:
#NOW LETS SEE A SCATTER PLOT
sns.scatterplot(x='total_bill',y='tip',data=tips)
Out[559]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b5954c8>
In [560]:
sns.lineplot(x='timepoint',y='signal',data=fmri)
Out[560]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b5f0248>
In [561]:
sns.lineplot(x='timepoint',y='signal',hue='event',style='event',markers=True,ci='sd',data=fmri)
Out[561]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b669bc8>
In [562]:
sns.lineplot(x='timepoint',y='signal',hue='event',style='event',markers=True,ci='sd',data=fmri,err_style='bars')
Out[562]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b6f42c8>
In [563]:
sns.lineplot(x='timepoint',y='signal',hue='event',style='event',markers=True,ci=68,data=fmri,err_style='bars')
Out[563]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b766308>
In [564]:
sns.lineplot(x='timepoint',y='signal',hue='region',style='event',markers=True,ci=68,data=fmri,err_style='bars')
#err_style represnts in bar and gives us approximated error and then ci should be a number.
Out[564]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b7ff188>
In [565]:
sns.lineplot(x='time',y='firing_rate',hue='coherence',style='choice',data=dots)#same using line plot
Out[565]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b8b7988>
In [566]:
sns.scatterplot(x='total_bill',y='tip',data=tips,hue='smoker',size='size',style='time')#similar we got in relplot
Out[566]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3b9aa0c8>
In [567]:
#lets LOAD IRIS DATA AND DRAW SCatter plot
In [568]:
iris=sns.load_dataset('iris')
iris.head()
sns.scatterplot(x='sepal_length',y='petal_length',data=iris) #BOTH ARE SAME.
Out[568]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3ba2aa08>
In [569]:
sns.scatterplot(x=iris['sepal_length'],y=iris['petal_length']) #ANOTHER WAY.
Out[569]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3ba96b88>
In [570]:
#CATEGORICAL DATA REPRESENTATION.
#CATEGORICAL DATA PLOT :
# --catplot()
 #--boxplot()
 #--stripplot()
 #--swarmplot()
 #--etc...
In [571]:
#LETS TAKE TIPS DATA
In [572]:
tips.head()
Out[572]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [573]:
sns.catplot(x='day',y='total_bill',data=tips) #DEFAULTLY JITTER IS TRUE
Out[573]:
<seaborn.axisgrid.FacetGrid at 0x16e3badd088>
In [574]:
sns.catplot(x='day',y='total_bill',data=tips,jitter=False)#TO REMOVE JITTER AND GET A SINGLE LINE,JITTER =FALSE
Out[574]:
<seaborn.axisgrid.FacetGrid at 0x16e3bafaec8>
In [575]:
sns.catplot(x='day',y='total_bill',data=tips,kind='swarm')#like a swarm it differenshiates
Out[575]:
<seaborn.axisgrid.FacetGrid at 0x16e3bb95b48>
In [576]:
sns.catplot(x='day',y='total_bill',data=tips,kind='swarm',hue='size')
Out[576]:
<seaborn.axisgrid.FacetGrid at 0x16e3cbea888>
In [577]:
sns.catplot(x='day',y='total_bill',data=tips,kind='swarm',hue='sex')
Out[577]:
<seaborn.axisgrid.FacetGrid at 0x16e385ee688>
In [578]:
sns.catplot(x='smoker',y='tip',data=tips,order=['No','Yes'])
Out[578]:
<seaborn.axisgrid.FacetGrid at 0x16e35f3c748>
In [579]:
tips.head()
Out[579]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [580]:
sns.catplot(x='day',y='total_bill',kind='box',hue='sex',data=tips) #BOX PLOTS FOR CATEGORICAL DATA
Out[580]:
<seaborn.axisgrid.FacetGrid at 0x16e386b5bc8>
In [581]:
sns.catplot(x='day',y='total_bill',kind='box',hue='time',data=tips)
Out[581]:
<seaborn.axisgrid.FacetGrid at 0x16e2f858f08>
In [582]:
#TO COMINE SUB CATEGORICAL PLOTS INTO 1 BOX PLOT USE DODGE=FALSE
In [583]:
sns.catplot(x='day',y='total_bill',kind='box',hue='time',data=tips,dodge=False)
Out[583]:
<seaborn.axisgrid.FacetGrid at 0x16e2a7e8488>
In [584]:
sns.catplot(x='day',y='total_bill',kind='box',hue='sex',data=tips,dodge=False) #similarly 
Out[584]:
<seaborn.axisgrid.FacetGrid at 0x16e20807108>
In [585]:
diamonds=sns.load_dataset('diamonds')
diamonds.head()
Out[585]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [586]:
sns.catplot(x='color',y='price',kind='boxen',data=diamonds.sort_values('color'))#BOXEN MAKES QUARTILE INTO BOXES
Out[586]:
<seaborn.axisgrid.FacetGrid at 0x16e297a2688>
In [587]:
sns.catplot(x='day',y='total_bill',kind='boxen',data=tips,dodge=False)
Out[587]:
<seaborn.axisgrid.FacetGrid at 0x16e297dee48>
In [588]:
sns.catplot(x='total_bill',y='day',hue='sex',kind='violin',data=tips,split=False,inner='stick')
#HERE SPLIT IS SIMILAR TO DODGE
#INNER STICK DRAWS LINES INPLOT
#VIOLIN IS SIMILAR TO BOXPLOT BUT INSTEAD REPRESENATION IS IN FORM OF VIOLIJN (OR) GUITAR
#inside violin plot we have boxplot
Out[588]:
<seaborn.axisgrid.FacetGrid at 0x16e2a9cdb48>
In [589]:
sns.catplot(x='total_bill',y='day',hue='sex',kind='violin',data=tips,split=False)#AS WE REMOVED STICK WE GET BOXPLOT
Out[589]:
<seaborn.axisgrid.FacetGrid at 0x16e29140688>
In [590]:
sns.catplot(x='total_bill',y='day',hue='sex',kind='violin',data=tips,split=True,inner='stick')
#split is true it combines it.
Out[590]:
<seaborn.axisgrid.FacetGrid at 0x16e3d14ce08>
In [591]:
sns.catplot(x='total_bill',y='day',hue='time',kind='violin',data=tips,split=False,inner='stick')
Out[591]:
<seaborn.axisgrid.FacetGrid at 0x16e3d7c4188>
In [592]:
sns.catplot(x='total_bill',y='day',hue='time',kind='violin',data=tips,split=True,inner='stick')
Out[592]:
<seaborn.axisgrid.FacetGrid at 0x16e3db6aec8>
In [593]:
#SWARM and VIOLIN plot together
In [594]:
g=sns.catplot(x='day',y='total_bill',kind='violin',inner=None,data=tips)
sns.swarmplot(x='day',y='total_bill',color='k',size=3,data=tips,ax=g.ax) 
#HERE WE COMBINED BOTH THE PLOTS SWARM AND VIOLIN PLOT
Out[594]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e1f2088>
In [595]:
titanic=sns.load_dataset("titanic")
titanic.head()
Out[595]:
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
0 0 3 male 22.0 1 0 7.2500 S Third man True NaN Southampton no False
1 1 1 female 38.0 1 0 71.2833 C First woman False C Cherbourg yes False
2 1 3 female 26.0 0 0 7.9250 S Third woman False NaN Southampton yes True
3 1 1 female 35.0 1 0 53.1000 S First woman False C Southampton yes False
4 0 3 male 35.0 0 0 8.0500 S Third man True NaN Southampton no True
In [596]:
sns.catplot(x='sex',y='survived',hue='class',kind='bar',data=titanic)
#IN BAR IF WE HAVE STRAIGHTEN LINES THEN IT MEANS IT HAS HIGH UNCERTAINITY
Out[596]:
<seaborn.axisgrid.FacetGrid at 0x16e3e269e88>
In [597]:
sns.catplot(x='deck',kind='count',palette='ch:0.25',data=titanic,hue='class')
Out[597]:
<seaborn.axisgrid.FacetGrid at 0x16e3e260e88>
In [598]:
sns.catplot(x='sex',y='survived',hue='class',kind='bar',data=titanic,palette='ch:0.25')
#here we can see difference between bar and point
#BIGGER THE LINE ABOVE THE BAR MEANS HIGH UNCERTAINITY
#IF ITS SMALLER THEN LESS UNCERTAINITY
Out[598]:
<seaborn.axisgrid.FacetGrid at 0x16e3e2e9808>
In [599]:
sns.catplot(x='sex',y='survived',hue='class',kind='point',data=titanic,palette='ch:0.25')
Out[599]:
<seaborn.axisgrid.FacetGrid at 0x16e3e410048>
In [600]:
#VISUALIZING THE DISTRIBUTION OF THE DATA
 #--distplot()
 #--kdeplot()
 #--jointplot()
 #--rugplot()
In [601]:
#UNIVARIATE DISTRIBUTION: (DISTRIBUTION PLOT)
x=randn(100)
sns.distplot(x)
Out[601]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e4a0088>
In [602]:
sns.distplot(x,kde=False)
Out[602]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e524e08>
In [603]:
sns.distplot(x,kde=False,rug=True)
#RUG DRAWS LINES AT THE BOTTOM
Out[603]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e4a69c8>
In [604]:
sns.distplot(x,kde=False,rug=True,bins=30)
#RUG DRAWS LINES AT THE BOTTOM
Out[604]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e5df948>
In [605]:
sns.distplot(x,kde=True,hist=False,rug=False,bins=30)
Out[605]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e681208>
In [606]:
sns.kdeplot(x,shade=True)
Out[606]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e729a08>
In [607]:
sns.kdeplot(x,shade=True,cbar=True,kernel='gau',bw='scott',cumulative=True)
Out[607]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e7ae388>
In [608]:
sns.kdeplot(x,shade=True,cbar=True,bw=0.2,cut=10)#bw(Bnadwidth)
Out[608]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e80c408>
In [609]:
sns.kdeplot(x,shade=True,cbar=True,bw=1,cut=0)#bw(Bnadwidth)[it gives smooth curve]
Out[609]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e3e87c488>
In [610]:
#bivaritae Distribution
In [611]:
tips.head()
Out[611]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [612]:
x=tips['total_bill']
y=tips['tip']
sns.set()
sns.jointplot(x='total_bill',y=y,data=tips,height=6,space=0.2,kind='hex',color='r',marginal_kws=dict(bins=15,rug=True),annot_kws=dict(stat='r'),edgecolor="w",linewidth=1)
Out[612]:
<seaborn.axisgrid.JointGrid at 0x16e3f929948>
In [613]:
sns.jointplot(x=x,y=y,data=tips)
Out[613]:
<seaborn.axisgrid.JointGrid at 0x16e3fa8fd08>
In [614]:
sns.jointplot(x=x,y=y,kind='hex')
sns.axes_style('white')
Out[614]:
{'figure.facecolor': 'white',
 'axes.labelcolor': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'axes.axisbelow': True,
 'grid.linestyle': '-',
 'text.color': '.15',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'patch.force_edgecolor': True,
 'image.cmap': 'rocket',
 'xtick.top': False,
 'ytick.right': False,
 'axes.grid': False,
 'axes.facecolor': 'white',
 'axes.edgecolor': '.15',
 'grid.color': '.8',
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True,
 'xtick.bottom': False,
 'ytick.left': False}
In [615]:
sns.jointplot(x=x,y=y,kind='kde')
sns.axes_style('white')
sns.set()
In [616]:
#USE ALONG SIDE KDE PLOT ONLY

f,ax=plt.subplots(figsize=(6,6))
cmap=sns.cubehelix_palette(as_cmap=True,dark=0,light=1,reverse=True)
sns.kdeplot(x,y,cmap=cmap,n_levels=60,shade=True)
Out[616]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e40e056c8>
In [617]:
g=sns.jointplot(x,y,kind='kde',color='m')
In [618]:
g=sns.jointplot(x,y,kind='kde',color='m')
g.plot_joint(plt.scatter,c='w',s=30,linewidth=1,marker='+')
g.ax_joint.collections[0].set_alpha(0)
In [619]:
#MULTIVARIATE
#Pairplot(Multipleplot)
sns.pairplot(iris)
#HERE WE GET NON DIAGONAL SCATTER PLOT
#&DIAGONAL UNIVARIATE BAR GRAPH
Out[619]:
<seaborn.axisgrid.PairGrid at 0x16e41167f48>
In [620]:
#to change this in pairplot
#HERE WE GET NON DIAGONAL SCATTER PLOT
#&DIAGONAL UNIVARIATE BAR GRAPH
g=sns.PairGrid(iris)
g.map_diag(sns.kdeplot)
g.map_offdiag(sns.kdeplot,n_levels=10)
#MULTIVARIATE ANALYSIS
Out[620]:
<seaborn.axisgrid.PairGrid at 0x16e40efdb88>
In [621]:
#LINEAR REGRESSION AND RELATIONSHIP related figures.
sns.regplot(x='total_bill',y='tip',data=tips)
Out[621]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e43506f88>
In [622]:
#ANOTHER WAY
sns.lmplot(x='total_bill',y='tip',data=tips)
Out[622]:
<seaborn.axisgrid.FacetGrid at 0x16e43755408>
In [623]:
sns.lmplot(x='size',y='tip',data=tips,x_jitter=0.05) #X AXIS JITTER
Out[623]:
<seaborn.axisgrid.FacetGrid at 0x16e43558d48>
In [624]:
sns.lmplot(x='size',y='tip',data=tips,x_estimator=np.mean)
#IT CALCULATES MEAN AND DRAWS A LINE
#dots represent the mean.
Out[624]:
<seaborn.axisgrid.FacetGrid at 0x16e437b2b88>
In [625]:
sns.lmplot(x='size',y='tip',data=tips,x_estimator=np.median)
#Similarly the points represent the median over here.
Out[625]:
<seaborn.axisgrid.FacetGrid at 0x16e4380cd08>
In [626]:
data=sns.load_dataset('anscombe')
data.head()
Out[626]:
dataset x y
0 I 10.0 8.04
1 I 8.0 6.95
2 I 13.0 7.58
3 I 9.0 8.81
4 I 11.0 8.33
In [627]:
sns.lmplot(x='x',y='y',data=data)
Out[627]:
<seaborn.axisgrid.FacetGrid at 0x16e438f32c8>
In [628]:
sns.lmplot(x='x',y='y',data=data.query("dataset=='I'"),ci=100,scatter_kws={'s':80})
Out[628]:
<seaborn.axisgrid.FacetGrid at 0x16e43982bc8>
In [629]:
sns.lmplot(x='x',y='y',data=data.query("dataset=='I'"),ci=None,scatter_kws={'s':80})
#NO CONFIDENCE INTERVALS
Out[629]:
<seaborn.axisgrid.FacetGrid at 0x16e4397f5c8>
In [630]:
sns.lmplot(x='x',y='y',data=data.query("dataset=='II'"),order=2,ci=None,scatter_kws={'s':80})
#HERE we can see there is no linear relationship but a polynomiyal relation that is to fit a polynomiyal relationship
#ORDER=2 #order is used to fit ploynomiyal relation.
Out[630]:
<seaborn.axisgrid.FacetGrid at 0x16e43a49088>
In [631]:
sns.lmplot(x='x',y='y',data=data.query("dataset=='III'"),ci=None,scatter_kws={'s':80})
#HERE WE CAN SEE AN OUTLIER EXISTS AND BECAUSE OF THAT THE BEST FIT LINE HAS SHIFTED.
#TO AVOID THAT USE ROBUST=TRUE[IT IGNORES OUTLIERS AND FITS BEST FIT LINE]
Out[631]:
<seaborn.axisgrid.FacetGrid at 0x16e43abdec8>
In [632]:
sns.lmplot(x='x',y='y',data=data.query("dataset=='II'"),order=3)
C:\Users\91733\Anaconda3\lib\site-packages\seaborn\regression.py:237: RankWarning: Polyfit may be poorly conditioned
  return np.polyval(np.polyfit(_x, _y, order), grid)
Out[632]:
<seaborn.axisgrid.FacetGrid at 0x16e43b35a88>
In [633]:
sns.lmplot(x='x',y='y',data=data.query("dataset=='III'"),robust=True,ci=None,scatter_kws={'s':80})
#Robust fits perfect straight line for linear relationship and ignores outliers
Out[633]:
<seaborn.axisgrid.FacetGrid at 0x16e43ba0c08>
In [634]:
sns.lmplot(x='total_bill',y='tip',data=tips,hue='sex',markers=['o','x'],col='time',row='smoker')
Out[634]:
<seaborn.axisgrid.FacetGrid at 0x16e43bb4e88>
In [635]:
f,ax=plt.subplots(figsize=(8,4))
sns.regplot(x='total_bill',y='tip',data=tips,ax=ax)
Out[635]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e44fa6f08>
In [636]:
sns.lmplot(x='total_bill',y='tip',data=tips,col='day',col_wrap=2,height=4)
Out[636]:
<seaborn.axisgrid.FacetGrid at 0x16e4501b308>
In [637]:
#CONTROLLING PLOTTED FIGURES AESTHETICS :
 #--figure styling
 #--axes styling
 #--color styling
 #--etc ....  
In [638]:
def sinplot(flip=1):
    x=np.linspace(0,14,100)
    for i in range(1,7):
        plt.plot(x,np.sin(x+i*0.5)*(7-i)*flip)
sinplot(-1)
In [639]:
sns.set_style('whitegrid')
sinplot()
In [640]:
sns.set_style('dark')
sinplot()
In [641]:
sns.set_style('white')
sinplot()
In [642]:
sns.set_style('ticks')
sinplot()
In [643]:
sns.set_style('ticks')
sinplot()
sns.despine()
In [644]:
sns.set_style('ticks')
sinplot()
sns.despine(left=True)
In [645]:
sns.set_style('ticks')
sinplot()
sns.despine(left=True,bottom=True)
In [646]:
sns.axes_style()
Out[646]:
{'axes.facecolor': 'white',
 'axes.edgecolor': '.15',
 'axes.grid': False,
 'axes.axisbelow': True,
 'axes.labelcolor': '.15',
 'figure.facecolor': 'white',
 'grid.color': '.8',
 'grid.linestyle': '-',
 'text.color': '.15',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'image.cmap': 'rocket',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'patch.force_edgecolor': True,
 'xtick.bottom': True,
 'xtick.top': False,
 'ytick.left': True,
 'ytick.right': False,
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True}
In [647]:
sns.set_style('ticks',{'axes.grid':True,'xtick.direction':'in'})
sinplot()
sns.despine(left=True,bottom=True)
In [648]:
sns.set_style('ticks',{'axes.grid':True,'xtick.direction':'in'})
sinplot()
sns.despine(left=True,bottom=False)
In [649]:
sns.set_style('darkgrid')
sinplot()
In [650]:
sns.set_style('darkgrid')
sns.set_context('poster')
sinplot()
In [651]:
sns.set_style('darkgrid')
sns.set_context('paper')
sinplot()
In [652]:
sns.set_style('darkgrid')
sns.set_context('talk',font_scale=1.5)
sinplot()
In [653]:
current_palettes=sns.color_palette()
sns.palplot(current_palettes)
In [654]:
current_palettes=sns.color_palette()
sns.palplot(current_palettes)
sns.palplot(sns.color_palette('hls',8))
In [655]:
#SUMMARY:
In [656]:
#distplot-Univariate Analysis
#jointplot-Bivariate Analysis
#pairplot-MULTIVARIATE analysis
In [657]:
import seaborn as sns
In [658]:
df=sns.load_dataset('tips')
df.head()
Out[658]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [659]:
df.corr()#correlation happens only among int and float
Out[659]:
total_bill tip size
total_bill 1.000000 0.675734 0.598315
tip 0.675734 1.000000 0.489299
size 0.598315 0.489299 1.000000
In [660]:
sns.heatmap(df.corr(),annot=True)
Out[660]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e46d3ffc8>
In [661]:
#Bivariate Analysis
In [662]:
sns.jointplot(x='tip',y='total_bill',data=df,kind='hex')
Out[662]:
<seaborn.axisgrid.JointGrid at 0x16e46e00188>
In [663]:
sns.jointplot(x='tip',y='total_bill',data=df,kind='reg')
Out[663]:
<seaborn.axisgrid.JointGrid at 0x16e46f212c8>
In [664]:
#Pairplot
In [665]:
sns.pairplot(df)
Out[665]:
<seaborn.axisgrid.PairGrid at 0x16e4708ee48>
In [666]:
sns.pairplot(df,hue='sex',height=5)
Out[666]:
<seaborn.axisgrid.PairGrid at 0x16e470495c8>
In [667]:
#Distplot
In [668]:
sns.distplot(df['tip'])#HERE KDE=TRUE AS DEFAULT so on Y axis it shows density
Out[668]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e482b24c8>
In [669]:
sns.distplot(df['tip'],kde=False,bins=10)#HERE AS KDE IS FALSE ON Y AXIS WE GET COUNT NOW INFORM OF A HISTOGRAM
Out[669]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e487d74c8>
In [670]:
#CATEGORICAL PLOTS
#-boxplot
#violinplot
#countplot
#barplot
In [671]:
#Countplot :IN THIS WE ONLY CAN GIVE EITHER X OR Y VALUE
sns.countplot('day',data=df)
Out[671]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e4885da88>
In [672]:
sns.countplot(y='day',data=df)
Out[672]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e48899ac8>
In [673]:
#Barplot
sns.barplot(x='total_bill',y='sex',data=df)
Out[673]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e488f0f48>
In [674]:
sns.barplot(x='sex',y='total_bill',data=df)
Out[674]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e48953cc8>
In [675]:
#BOXPLOT
sns.boxplot(x='sex',y='total_bill',data=df)
Out[675]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e48997c48>
In [676]:
sns.boxplot(x='day',y='total_bill',data=df,palette='rainbow')
Out[676]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e48a00bc8>
In [677]:
sns.boxplot(data=df,orient='r')
Out[677]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e49a6a148>
In [678]:
sns.boxplot(x='total_bill',y='day',hue='smoker',data=df)
Out[678]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e49af0608>
In [679]:
#VIOLINPLOT: HELPS TO SEE DISTRIBUTION OF DATA IN TERMS OF KERNEL DENSITY ESTIMATION IN THE BOXPLOT
sns.violinplot(x='total_bill',y='day',data=df,palette='rainbow')
Out[679]:
<matplotlib.axes._subplots.AxesSubplot at 0x16e49bed388>
In [ ]:
#THANKYOU BY RAMA VEERA ISAIAH.